This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
library(xgboost)
library(Matrix)
library(mclust)
ds0 <- readRDS("./ds0.rds")
ds1 <- readRDS("./ds1.rds")
ds2 <- readRDS("./ds2.rds")
Idents(ds2) <- ds2$conditions
ds2_AC <- subset(ds2, idents = "AC")
ds2_PA <- subset(ds2, idents = "PA")
ds2_AC <- ds2_AC %>% FindNeighbors(dims = 1:20) %>% FindClusters(resolution = 0.1)
ds2_PA <- ds2_PA %>% FindNeighbors(dims = 1:20) %>% FindClusters(resolution = 0.1)
umapplot(ds2_AC) + scale_y_continuous(limits = c(-5,15),breaks = NULL) +
scale_x_continuous(limits = c(-5,15),breaks = NULL)
umapplot(ds2_PA)+ scale_y_continuous(limits = c(-5,15),breaks = NULL) +
scale_x_continuous(limits = c(-5,15),breaks = NULL)
AC_markers <- FindAllMarkers(ds2_AC,logfc.threshold = 0.7, min.diff.pct = 0.2)
PA_markers <- FindAllMarkers(ds2_PA,logfc.threshold = 0.7, min.diff.pct = 0.2)
AC_data <- get_data_table(ds2_AC, highvar = F, type = "data")
AC_label <- as.numeric(as.character(Idents(ds2_AC)))
set.seed(7)
index <- c(1:dim(AC_data)[2]) %>% sample(ceiling(0.3*dim(AC_data)[2]), replace = F, prob = NULL)
colnames(AC_data) <- NULL
AC_train_data <- list(data = t(as(AC_data[,-index],"dgCMatrix")), label = AC_label[-index])
AC_test_data <- list(data = t(as(AC_data[,index],"dgCMatrix")), label = AC_label[index])
# data(agaricus.train, package='xgboost')
AC_train <- xgb.DMatrix(data = AC_train_data$data,label = AC_train_data$label)
AC_test <- xgb.DMatrix(data = AC_test_data$data,label = AC_test_data$label)
# xgb_params_train = {
# 'objective':'multi:softprob',
# 'eval_metric':'mlogloss',
# 'num_class':self.numbertrainclasses,
# 'eta':0.2,
# 'max_depth':6,
# 'subsample': 0.6}
# nround = 200
watchlist <- list(train = AC_train, eval = AC_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds2_AC))),
objective = "multi:softmax", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, AC_train, nrounds = 200, watchlist, verbose = 0)
# 特征提取
importance <- xgb.importance(colnames(AC_train), model = bst_model)
head(importance)
xgb.ggplot.importance(head(importance,20),n_clusters = 1) #这个cluster和分类没有关系
multi_featureplot(head(importance,9)$Feature,ds2)
AC_genes <- head(importance, 500) ##选择top500
#混淆矩阵
predict_AC_test <- round(predict(bst_model, newdata = AC_test))
AC_confuse_matrix_test <- table(AC_test_data$label, predict_AC_test, dnn=c("true","pre"))
AC_confuse_matrix_test_prop <- prop.table(AC_confuse_matrix_test, 1)
AC_confuse_matrix_test_prop
pre
true 0 1 2 3
0 0.994652406 0.002673797 0.002673797 0.000000000
1 0.000000000 0.987755102 0.012244898 0.000000000
2 0.000000000 0.026455026 0.962962963 0.010582011
3 0.000000000 0.009009009 0.054054054 0.936936937
#ROC曲线
# xgboost_roc <- pROC::multiclass.roc(AC_test_data$label, predict_AC_test) #多分类ROC
xgboost_roc <- pROC::roc(AC_test_data$label, predict_AC_test)
Warning in roc.default(AC_test_data$label, predict_AC_test) :
'response' has more than two levels. Consider setting 'levels' explicitly or using 'multiclass.roc' instead
Setting levels: control = 0, case = 1
Setting direction: controls < cases
plot(xgboost_roc, print.auc=TRUE, auc.polygon=TRUE,
grid=c(0.1, 0.2),grid.col=c("green", "red"),
max.auc.polygon=TRUE,auc.polygon.col="skyblue",
print.thres=TRUE,main='ROC curve') #前两个分量
PA_data <- get_data_table(ds2_PA, highvar = F, type = "data")
PA_label <- as.numeric(as.character(Idents(ds2_PA)))
set.seed(7)
index <- c(1:dim(PA_data)[2]) %>% sample(ceiling(0.3*dim(PA_data)[2]), replace = F, prob = NULL)
colnames(PA_data) <- NULL
PA_train_data <- list(data = t(as(PA_data[,-index],"dgCMatrix")), label = PA_label[-index])
PA_test_data <- list(data = t(as(PA_data[,index],"dgCMatrix")), label = PA_label[index])
# data(agaricus.train, pPAkage='xgboost')
PA_train <- xgb.DMatrix(data = PA_train_data$data,label = PA_train_data$label)
PA_test <- xgb.DMatrix(data = PA_test_data$data,label = PA_test_data$label)
# xgb_params_train = {
# 'objective':'multi:softprob',
# 'eval_metric':'mlogloss',
# 'num_class':self.numbertrainclasses,
# 'eta':0.2,
# 'max_depth':6,
# 'subsample': 0.6}
# nround = 200
watchlist <- list(train = PA_train, eval = PA_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds2_PA))),
objective = "multi:softmax", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, PA_train, nrounds = 200, watchlist, verbose = 0)
selected_features <- intersect(PA_genes$Feature, AC_genes$Feature)
PA_data <- get_data_table(ds2_PA, highvar = F, type = "data")
PA_data <- PA_data[selected_features,]
PA_label <- as.numeric(as.character(Idents(ds2_PA)))
colnames(PA_data) <- NULL
PA_train_data <- list(data = t(as(PA_data,"dgCMatrix")), label = PA_label)
PA_train <- xgb.DMatrix(data = PA_train_data$data,label = PA_train_data$label)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds2_PA))),
objective = "multi:softmax", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, PA_train, nrounds = 200, verbose = 1)
# 特征提取
importance <- xgb.importance(colnames(PA_train), model = bst_model)
head(importance)
xgb.ggplot.importance(head(importance,20),n_clusters = 1)
multi_featureplot(head(importance,9)$Feature, ds2)
AC_data <- get_data_table(ds2_AC, highvar = F, type = "data")
AC_data <- AC_data[selected_features,]
AC_label <- as.numeric(as.character(Idents(ds2_AC)))
colnames(AC_data) <- NULL
AC_test_data <- list(data = t(as(AC_data,"dgCMatrix")), label = AC_label)
AC_test <- xgb.DMatrix(data = AC_test_data$data,label = AC_test_data$label)
#计算混淆矩阵
predict_AC_test <- round(predict(bst_model, newdata = AC_test))
AC_confuse_matrix_test <- table(AC_test_data$label, predict_AC_test, dnn=c("true","pre"))
AC_confuse_matrix_test_prop <- prop.table(AC_confuse_matrix_test,1)
AC_confuse_matrix_test_prop #分析发育轨迹
pre
true 0 1 2
0 0.992635025 0.004091653 0.003273322
1 0.824879227 0.172705314 0.002415459
2 0.449922958 0.497688752 0.052388290
3 0.002762431 0.066298343 0.930939227
#ROC曲线
# xgboost_roc <- pROC::multiclass.roc(AC_test_data$label, predict_AC_test) #多分类ROC
xgboost_roc <- pROC::roc(AC_test_data$label, predict_AC_test)
Warning in roc.default(AC_test_data$label, predict_AC_test) :
'response' has more than two levels. Consider setting 'levels' explicitly or using 'multiclass.roc' instead
Setting levels: control = 0, case = 1
Setting direction: controls < cases
plot(xgboost_roc, print.auc=TRUE, auc.polygon=TRUE,
grid=c(0.1, 0.2),grid.col=c("green", "red"),
max.auc.polygon=TRUE,auc.polygon.col="skyblue",
print.thres=TRUE,main='ROC curve') #前两个分量ROC
multi_featureplot(head(importance,4)$Feature, ds2, split.by = "conditions")
multi_featureplot(head(importance,9)$Feature, ds2_AC)
# 计算ARI
adjustedRandIndex(predict_AC_test, AC_test_data$label)
[1] 0.307929
AC_data <- get_data_table(ds2_AC, highvar = F, type = "data")
AC_data <- AC_data[selected_features,]
AC_label <- as.numeric(as.character(Idents(ds2_AC)))
colnames(AC_data) <- NULL
AC_train_data <- list(data = t(as(AC_data,"dgCMatrix")), label = AC_label)
AC_train <- xgb.DMatrix(data = AC_train_data$data,label = AC_train_data$label)
xgb_ACram <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds2_AC))),
objective = "multi:softmax", eval_metric = 'mlogloss')
bst_model2 <- xgb.train(xgb_ACram, AC_train, nrounds = 200, verbose = 1)
# 特征提取
importance2 <- xgb.importance(colnames(AC_train), model = bst_model2)
head(importance2)
xgb.ggplot.importance(head(importance2,20),n_clusters = 1)
multi_featureplot(head(importance2,9)$Feature, ds2)
PA_data <- get_data_table(ds2_PA, highvar = F, type = "data")
PA_data <- PA_data[selected_features,]
PA_label <- as.numeric(as.character(Idents(ds2_PA)))
colnames(PA_data) <- NULL
PA_test_data <- list(data = t(as(PA_data,"dgCMatrix")), label = PA_label)
PA_test <- xgb.DMatrix(data = PA_test_data$data,label = PA_test_data$label)
#计算混淆矩阵
predict_PA_test <- round(predict(bst_model2, newdata = PA_test))
PA_confuse_matrix_test <- table(PA_test_data$label, predict_PA_test, dnn=c("true","pre"))
PA_confuse_matrix_test_prop <- prop.table(PA_confuse_matrix_test,1)
PA_confuse_matrix_test_prop #分析发育轨迹
#ROC曲线
# xgboost_roc <- pROC::multiclass.roc(PA_test_data$label, predict_PA_test) #多分类ROC
xgboost_roc <- pROC::roc(PA_test_data$label, predict_PA_test)
plot(xgboost_roc, print.auc=TRUE, auc.polygon=TRUE,
grid=c(0.1, 0.2),grid.col=c("green", "red"),
max.auc.polygon=TRUE,auc.polygon.col="skyblue",
print.thres=TRUE,main='ROC curve') #前两个分量ROC
# 计算ARI
adjustedRandIndex(predict_PA_test, PA_test_data$label)
umapplot(ds2,split.by = "conditions")
table(ds2$conditions)
pre
true 0 1 2 0 0.980360065 0.003273322 0.016366612 1 0.799516908 0.196859903 0.003623188 2 0.453004622 0.493066256 0.053929122 3 0.002762431 0.052486188 0.944751381 ## AC ->PA ARI 0.1797689 pre true 0 1 2 3 0 0.027107438 0.287272727 0.682644628 0.002975207 1 0.000349895 0.075227432 0.914975507 0.009447166 2 0.008130081 0.003252033 0.175609756 0.813008130
library(plotly)
plot_ly(type = "sankey", orientation = "h",
node = list(
label = c("PA_0", "PA_1", "PA_2", "AC_0","AC_1","AC_2","AC_3"),
color = colors_list, pad = 15, thickness = 30,
line = list(
color = "black",
width = 1)),
link = list(
source = c(0,0,0,0,1,1,1,1,2,2,2,2),
target = c(3,4,5,6,3,4,5,6,3,4,5,6),
value = as.numeric(AC_confuse_matrix_test),
color = c("#66C2A5","#66C2A5","#66C2A5","#66C2A5","#FC8D62","#FC8D62","#FC8D62",
"#FC8D62","#8DA0CB","#8DA0CB","#8DA0CB","#8DA0CB")
))
AC_confuse_matrix_test
pre
true 0 1 2
0 1213 5 4
1 683 143 2
2 292 323 34
3 1 24 337
t(PA_confuse_matrix_test)
true
pre 0 1 2
0 73 3 4
1 853 198 3
2 2094 2634 108
3 5 23 500
# fig <- fig %>% layout(
# title = "Basic Sankey Diagram",
# font = list(
# size = 10 ))
#
umapplot(ds2_AC)
umapplot(ds2_PA)
umapplot(ds2,split.by = "conditions")
temp <- get_data_table(ds2_AC, highvar = F, type = "data")
AC_data <- matrix(data=0, nrow = length(AC_genes$Feature), ncol = length(colnames(temp)), byrow = FALSE, dimnames = list(AC_genes$Feature,colnames(temp)))
for(i in intersect(AC_genes$Feature, rownames(temp))){
AC_data[i,] <- temp[i,]
}
rm(temp)
AC_label <- as.numeric(as.character(Idents(ds2_AC)))
set.seed(7)
index <- c(1:dim(AC_data)[2]) %>% sample(ceiling(0.3*dim(AC_data)[2]), replace = F, prob = NULL)
colnames(AC_data) <- NULL
AC_train_data <- list(data = t(as(AC_data[,-index],"dgCMatrix")), label = AC_label[-index])
AC_test_data <- list(data = t(as(AC_data[,index],"dgCMatrix")), label = AC_label[index])
AC_train <- xgb.DMatrix(data = AC_train_data$data,label = AC_train_data$label)
AC_test <- xgb.DMatrix(data = AC_test_data$data,label = AC_test_data$label)
watchlist <- list(train = AC_train, eval = AC_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(ds2_AC))),
objective = "multi:softmax", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, AC_train, nrounds = 200, watchlist, verbose = 0)
# 特征提取
importance <- xgb.importance(colnames(AC_train), model = bst_model)
head(importance)
xgb.ggplot.importance(head(importance,20),n_clusters = 1) #这个cluster和分类没有关系
multi_featureplot(head(importance,9)$Feature,ds2)
#混淆矩阵
predict_AC_test <- round(predict(bst_model, newdata = AC_test))
AC_confuse_matrix_test <- table(AC_test_data$label, predict_AC_test, dnn=c("true","pre"))
AC_confuse_matrix_test_prop <- prop.table(AC_confuse_matrix_test, 1)
AC_confuse_matrix_test_prop
#ROC曲线
# xgboost_roc <- pROC::multiclass.roc(AC_test_data$label, predict_AC_test) #多分类ROC
xgboost_roc <- pROC::roc(AC_test_data$label, predict_AC_test)
plot(xgboost_roc, print.auc=TRUE, auc.polygon=TRUE,
grid=c(0.1, 0.2),grid.col=c("green", "red"),
max.auc.polygon=TRUE,auc.polygon.col="skyblue",
print.thres=TRUE,main='ROC curve') #前两个分量
ds1 <- ds1 %>% FindNeighbors(dims = 1:20) %>% FindClusters(resolution = 0.3)
umapplot(ds1)
ds1 <- RenameIdents(ds1,'0' = '0','1' ='0','2'='1','3'='2','4' = '3','5' = '1')
f("FBLN1",ds1)
ds1_markers <- FindAllMarkers(ds1,logfc.threshold = 0.5, min.diff.pct = 0.2)
temp <- get_data_table(ds1, highvar = F, type = "data")
ds1_data <- matrix(data=0, nrow = length(AC_genes$Feature), ncol = length(colnames(temp)), byrow = FALSE, dimnames = list(AC_genes$Feature,colnames(temp)))
for(i in intersect(AC_genes$Feature, rownames(temp))){
ds1_data[i,] <- temp[i,]
}
rm(temp)
ds1_label <- as.numeric(as.character(Idents(ds1)))
colnames(ds1_data) <- NULL
ds1_test_data <- list(data = t(as(ds1_data,"dgCMatrix")), label = ds1_label)
ds1_test <- xgb.DMatrix(data = ds1_test_data$data,label = ds1_test_data$label)
#计算混淆矩阵
predict_ds1_test <- round(predict(bst_model, newdata = ds1_test))
ds1_data_confuse_matrix_test <- table(ds1_test_data$label, predict_ds1_test, dnn=c("true","pre"))
ds1_data_confuse_matrix_test_prop <- prop.table(ds1_data_confuse_matrix_test,1)
ds1_data_confuse_matrix_test
ds1_data_confuse_matrix_test_prop #分析发育轨迹
#ROC曲线
# xgboost_roc <- pROC::multiclass.roc(ds1_test_data$label, predict_ds1_test) #多分类ROC
xgboost_roc <- pROC::roc(ds1_test_data$label, predict_ds1_test)
plot(xgboost_roc, print.auc=TRUE, auc.polygon=TRUE,
grid=c(0.1, 0.2),grid.col=c("green", "red"),
max.auc.polygon=TRUE,auc.polygon.col="skyblue",
print.thres=TRUE,main='ROC curve') #前两个分量ROC
# 计算ARI
adjustedRandIndex(predict_ds1_test, ds1_test_data$label)
ds0 <- ds0 %>% FindNeighbors(dims = 1:20) %>% FindClusters(resolution = 0.1)
umapplot(ds0)
f("MYH11",ds0)
ds0_markers <- FindAllMarkers(ds0,logfc.threshold = 0.7, min.diff.pct = 0.2)
selected_features <- AC_genes$Feature
temp <- get_data_table(ds0, highvar = F, type = "data")
ds0_data <- matrix(data=0, nrow = length(selected_features), ncol = length(colnames(temp)), byrow = FALSE, dimnames = list(selected_features,colnames(temp)))
for(i in intersect(selected_features,rownames(temp))){
ds0_data[i,] <- temp[i,]
}
# rm(temp)
ds0_label <- as.numeric(as.character(Idents(ds0)))
colnames(ds0_data) <- NULL
ds0_test_data <- list(data = t(as(ds0_data,"dgCMatrix")), label = ds0_label)
ds0_test <- xgb.DMatrix(data = ds0_test_data$data,label = ds0_test_data$label)
#计算混淆矩阵
predict_ds0_test <- round(predict(bst_model, newdata = ds0_test))
ds0_data_confuse_matrix_test <- table(ds0_test_data$label, predict_ds0_test, dnn=c("true","pre"))
ds0_data_confuse_matrix_test_prop <- prop.table(ds0_data_confuse_matrix_test,1)
ds0_data_confuse_matrix_test
ds0_data_confuse_matrix_test_prop #分析发育轨迹
#ROC曲线
# xgboost_roc <- pROC::multiclass.roc(ds0_test_data$label, predict_ds0_test) #多分类ROC
xgboost_roc <- pROC::roc(ds0_test_data$label, predict_ds0_test)
plot(xgboost_roc, print.auc=TRUE, auc.polygon=TRUE,
grid=c(0.1, 0.2),grid.col=c("green", "red"),
max.auc.polygon=TRUE,auc.polygon.col="skyblue",
print.thres=TRUE,main='ROC curve') #前两个分量ROC
# 计算ARI
adjustedRandIndex(predict_ds0_test, ds0_test_data$label)
# load("./init.RData")
multi_featureplot(head(importance2,9)$Feature, ds2_AC)
multi_featureplot(head(importance2,9)$Feature, ds0)
multi_featureplot(head(importance2,9)$Feature, ds1)
f("MYH11", ds2_AC)
umapplot(ds0)
Idents(lym_ds2) <- lym_ds2$conditions
lym_ds2_AC <- subset(lym_ds2, idents = "AC")
lym_ds2_PA <- subset(lym_ds2, idents = "PA")
lym_ds2_AC <- lym_ds2_AC %>% FindNeighbors(dims = 1:20) %>% FindClusters(resolution = 0.2)
umapplot(lym_ds2_AC)
lym_ds2_PA <- lym_ds2_PA %>% FindNeighbors(dims = 1:20) %>% FindClusters(resolution = 0.2)
umapplot(lym_ds2_PA)
lym_PA_data <- get_data_table(lym_ds2_PA, highvar = F, type = "data")
lym_PA_label <- as.numeric(as.character(Idents(lym_ds2_PA)))
set.seed(7)
index <- c(1:dim(lym_PA_data)[2]) %>% sample(ceiling(0.3*dim(lym_PA_data)[2]), replace = F, prob = NULL)
colnames(lym_PA_data) <- NULL
lym_PA_train_data <- list(data = t(as(lym_PA_data[,-index],"dgCMatrix")), label = lym_PA_label[-index])
lym_PA_test_data <- list(data = t(as(lym_PA_data[,index],"dgCMatrix")), label = lym_PA_label[index])
# data(agaricus.train, plym_PAkage='xgboost')
lym_PA_train <- xgb.DMatrix(data = lym_PA_train_data$data,label = lym_PA_train_data$label)
lym_PA_test <- xgb.DMatrix(data = lym_PA_test_data$data,label = lym_PA_test_data$label)
watchlist <- list(train = lym_PA_train, eval = lym_PA_test)
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(lym_ds2_PA))),
objective = "multi:softmax", eval_metric = 'mlogloss')
bst_model <- xgb.train(xgb_param, lym_PA_train, nrounds = 200, watchlist, verbose = 1)
[1] train-mlogloss:1.364683 eval-mlogloss:1.386518
[2] train-mlogloss:1.104948 eval-mlogloss:1.141078
[3] train-mlogloss:0.921658 eval-mlogloss:0.970349
[4] train-mlogloss:0.782067 eval-mlogloss:0.838498
[5] train-mlogloss:0.672917 eval-mlogloss:0.737674
[6] train-mlogloss:0.584400 eval-mlogloss:0.656063
[7] train-mlogloss:0.511279 eval-mlogloss:0.589930
[8] train-mlogloss:0.450917 eval-mlogloss:0.535140
[9] train-mlogloss:0.399006 eval-mlogloss:0.489075
[10] train-mlogloss:0.356262 eval-mlogloss:0.450757
[11] train-mlogloss:0.321021 eval-mlogloss:0.418092
[12] train-mlogloss:0.289741 eval-mlogloss:0.391501
[13] train-mlogloss:0.263458 eval-mlogloss:0.368993
[14] train-mlogloss:0.239869 eval-mlogloss:0.348346
[15] train-mlogloss:0.219948 eval-mlogloss:0.330793
[16] train-mlogloss:0.201978 eval-mlogloss:0.316338
[17] train-mlogloss:0.186481 eval-mlogloss:0.302551
[18] train-mlogloss:0.173125 eval-mlogloss:0.292528
[19] train-mlogloss:0.160651 eval-mlogloss:0.283499
[20] train-mlogloss:0.149984 eval-mlogloss:0.274891
[21] train-mlogloss:0.141659 eval-mlogloss:0.268080
[22] train-mlogloss:0.133263 eval-mlogloss:0.261263
[23] train-mlogloss:0.125407 eval-mlogloss:0.255525
[24] train-mlogloss:0.117925 eval-mlogloss:0.250536
[25] train-mlogloss:0.111371 eval-mlogloss:0.246171
[26] train-mlogloss:0.105522 eval-mlogloss:0.242234
[27] train-mlogloss:0.100195 eval-mlogloss:0.238797
[28] train-mlogloss:0.095323 eval-mlogloss:0.235266
[29] train-mlogloss:0.090141 eval-mlogloss:0.232179
[30] train-mlogloss:0.086082 eval-mlogloss:0.229155
[31] train-mlogloss:0.081895 eval-mlogloss:0.225177
[32] train-mlogloss:0.078219 eval-mlogloss:0.223170
[33] train-mlogloss:0.074680 eval-mlogloss:0.221246
[34] train-mlogloss:0.070858 eval-mlogloss:0.219093
[35] train-mlogloss:0.067738 eval-mlogloss:0.217748
[36] train-mlogloss:0.065098 eval-mlogloss:0.216182
[37] train-mlogloss:0.062028 eval-mlogloss:0.214797
[38] train-mlogloss:0.059496 eval-mlogloss:0.213793
[39] train-mlogloss:0.056910 eval-mlogloss:0.212119
[40] train-mlogloss:0.054699 eval-mlogloss:0.210574
[41] train-mlogloss:0.052492 eval-mlogloss:0.209919
[42] train-mlogloss:0.050442 eval-mlogloss:0.209348
[43] train-mlogloss:0.048298 eval-mlogloss:0.208500
[44] train-mlogloss:0.046239 eval-mlogloss:0.207525
[45] train-mlogloss:0.043837 eval-mlogloss:0.206795
[46] train-mlogloss:0.041920 eval-mlogloss:0.206059
[47] train-mlogloss:0.040478 eval-mlogloss:0.205698
[48] train-mlogloss:0.038953 eval-mlogloss:0.204660
[49] train-mlogloss:0.037501 eval-mlogloss:0.204111
[50] train-mlogloss:0.035999 eval-mlogloss:0.202725
[51] train-mlogloss:0.034733 eval-mlogloss:0.202490
[52] train-mlogloss:0.033403 eval-mlogloss:0.202015
[53] train-mlogloss:0.032222 eval-mlogloss:0.201200
[54] train-mlogloss:0.031079 eval-mlogloss:0.200641
[55] train-mlogloss:0.029928 eval-mlogloss:0.200692
[56] train-mlogloss:0.028919 eval-mlogloss:0.200172
[57] train-mlogloss:0.027901 eval-mlogloss:0.199742
[58] train-mlogloss:0.026743 eval-mlogloss:0.199245
[59] train-mlogloss:0.025905 eval-mlogloss:0.198625
[60] train-mlogloss:0.025117 eval-mlogloss:0.198325
[61] train-mlogloss:0.024466 eval-mlogloss:0.198372
[62] train-mlogloss:0.023671 eval-mlogloss:0.198038
[63] train-mlogloss:0.022920 eval-mlogloss:0.197873
[64] train-mlogloss:0.022205 eval-mlogloss:0.197768
[65] train-mlogloss:0.021412 eval-mlogloss:0.197384
[66] train-mlogloss:0.020720 eval-mlogloss:0.197906
[67] train-mlogloss:0.020052 eval-mlogloss:0.197524
[68] train-mlogloss:0.019477 eval-mlogloss:0.197668
[69] train-mlogloss:0.018844 eval-mlogloss:0.197758
[70] train-mlogloss:0.018307 eval-mlogloss:0.197885
[71] train-mlogloss:0.017821 eval-mlogloss:0.197590
[72] train-mlogloss:0.017313 eval-mlogloss:0.197406
[73] train-mlogloss:0.016777 eval-mlogloss:0.197523
[74] train-mlogloss:0.016241 eval-mlogloss:0.197133
[75] train-mlogloss:0.015771 eval-mlogloss:0.197171
[76] train-mlogloss:0.015318 eval-mlogloss:0.197467
[77] train-mlogloss:0.014870 eval-mlogloss:0.197440
[78] train-mlogloss:0.014466 eval-mlogloss:0.197473
[79] train-mlogloss:0.014131 eval-mlogloss:0.197731
[80] train-mlogloss:0.013809 eval-mlogloss:0.197644
[81] train-mlogloss:0.013499 eval-mlogloss:0.197318
[82] train-mlogloss:0.013141 eval-mlogloss:0.197589
[83] train-mlogloss:0.012801 eval-mlogloss:0.197747
[84] train-mlogloss:0.012491 eval-mlogloss:0.197841
[85] train-mlogloss:0.012179 eval-mlogloss:0.197636
[86] train-mlogloss:0.011924 eval-mlogloss:0.197332
[87] train-mlogloss:0.011616 eval-mlogloss:0.197180
[88] train-mlogloss:0.011380 eval-mlogloss:0.197222
[89] train-mlogloss:0.011099 eval-mlogloss:0.197491
[90] train-mlogloss:0.010876 eval-mlogloss:0.197282
[91] train-mlogloss:0.010589 eval-mlogloss:0.197249
[92] train-mlogloss:0.010349 eval-mlogloss:0.197506
[93] train-mlogloss:0.010140 eval-mlogloss:0.197970
[94] train-mlogloss:0.009939 eval-mlogloss:0.198141
[95] train-mlogloss:0.009692 eval-mlogloss:0.198471
[96] train-mlogloss:0.009481 eval-mlogloss:0.198527
[97] train-mlogloss:0.009279 eval-mlogloss:0.198439
[98] train-mlogloss:0.009096 eval-mlogloss:0.198074
[99] train-mlogloss:0.008897 eval-mlogloss:0.198102
[100] train-mlogloss:0.008731 eval-mlogloss:0.197824
[101] train-mlogloss:0.008556 eval-mlogloss:0.197866
[102] train-mlogloss:0.008383 eval-mlogloss:0.197806
[103] train-mlogloss:0.008228 eval-mlogloss:0.197520
[104] train-mlogloss:0.008049 eval-mlogloss:0.197788
[105] train-mlogloss:0.007900 eval-mlogloss:0.197800
[106] train-mlogloss:0.007729 eval-mlogloss:0.198120
[107] train-mlogloss:0.007563 eval-mlogloss:0.198505
[108] train-mlogloss:0.007429 eval-mlogloss:0.198300
[109] train-mlogloss:0.007293 eval-mlogloss:0.198239
[110] train-mlogloss:0.007160 eval-mlogloss:0.198536
[111] train-mlogloss:0.007041 eval-mlogloss:0.198668
[112] train-mlogloss:0.006910 eval-mlogloss:0.198853
[113] train-mlogloss:0.006797 eval-mlogloss:0.199206
[114] train-mlogloss:0.006663 eval-mlogloss:0.199274
[115] train-mlogloss:0.006545 eval-mlogloss:0.199534
[116] train-mlogloss:0.006447 eval-mlogloss:0.199778
[117] train-mlogloss:0.006341 eval-mlogloss:0.199769
[118] train-mlogloss:0.006230 eval-mlogloss:0.200434
[119] train-mlogloss:0.006126 eval-mlogloss:0.200316
[120] train-mlogloss:0.006018 eval-mlogloss:0.200517
[121] train-mlogloss:0.005931 eval-mlogloss:0.200513
[122] train-mlogloss:0.005853 eval-mlogloss:0.200742
[123] train-mlogloss:0.005760 eval-mlogloss:0.200660
[124] train-mlogloss:0.005664 eval-mlogloss:0.200726
[125] train-mlogloss:0.005584 eval-mlogloss:0.200510
[126] train-mlogloss:0.005494 eval-mlogloss:0.200713
[127] train-mlogloss:0.005411 eval-mlogloss:0.200679
[128] train-mlogloss:0.005332 eval-mlogloss:0.200700
[129] train-mlogloss:0.005252 eval-mlogloss:0.201104
[130] train-mlogloss:0.005167 eval-mlogloss:0.201229
[131] train-mlogloss:0.005091 eval-mlogloss:0.201343
[132] train-mlogloss:0.005018 eval-mlogloss:0.201516
[133] train-mlogloss:0.004960 eval-mlogloss:0.201438
[134] train-mlogloss:0.004881 eval-mlogloss:0.201630
[135] train-mlogloss:0.004811 eval-mlogloss:0.201697
[136] train-mlogloss:0.004753 eval-mlogloss:0.202074
[137] train-mlogloss:0.004691 eval-mlogloss:0.202327
[138] train-mlogloss:0.004637 eval-mlogloss:0.202381
[139] train-mlogloss:0.004581 eval-mlogloss:0.202340
[140] train-mlogloss:0.004514 eval-mlogloss:0.202584
[141] train-mlogloss:0.004456 eval-mlogloss:0.202810
[142] train-mlogloss:0.004397 eval-mlogloss:0.202952
[143] train-mlogloss:0.004349 eval-mlogloss:0.202689
[144] train-mlogloss:0.004290 eval-mlogloss:0.202738
[145] train-mlogloss:0.004235 eval-mlogloss:0.202947
[146] train-mlogloss:0.004182 eval-mlogloss:0.202993
[147] train-mlogloss:0.004132 eval-mlogloss:0.203193
[148] train-mlogloss:0.004081 eval-mlogloss:0.203205
[149] train-mlogloss:0.004038 eval-mlogloss:0.203326
[150] train-mlogloss:0.003995 eval-mlogloss:0.203395
[151] train-mlogloss:0.003952 eval-mlogloss:0.203801
[152] train-mlogloss:0.003903 eval-mlogloss:0.203770
[153] train-mlogloss:0.003862 eval-mlogloss:0.203783
[154] train-mlogloss:0.003817 eval-mlogloss:0.203899
[155] train-mlogloss:0.003772 eval-mlogloss:0.203829
[156] train-mlogloss:0.003730 eval-mlogloss:0.203774
[157] train-mlogloss:0.003685 eval-mlogloss:0.204061
[158] train-mlogloss:0.003648 eval-mlogloss:0.203985
[159] train-mlogloss:0.003611 eval-mlogloss:0.204184
[160] train-mlogloss:0.003573 eval-mlogloss:0.204398
[161] train-mlogloss:0.003536 eval-mlogloss:0.204404
[162] train-mlogloss:0.003499 eval-mlogloss:0.204376
[163] train-mlogloss:0.003462 eval-mlogloss:0.204346
[164] train-mlogloss:0.003423 eval-mlogloss:0.204499
[165] train-mlogloss:0.003388 eval-mlogloss:0.204599
[166] train-mlogloss:0.003352 eval-mlogloss:0.204687
[167] train-mlogloss:0.003317 eval-mlogloss:0.204729
[168] train-mlogloss:0.003287 eval-mlogloss:0.204787
[169] train-mlogloss:0.003257 eval-mlogloss:0.204873
[170] train-mlogloss:0.003225 eval-mlogloss:0.204902
[171] train-mlogloss:0.003192 eval-mlogloss:0.205109
[172] train-mlogloss:0.003162 eval-mlogloss:0.205125
[173] train-mlogloss:0.003137 eval-mlogloss:0.204954
[174] train-mlogloss:0.003108 eval-mlogloss:0.205048
[175] train-mlogloss:0.003084 eval-mlogloss:0.205078
[176] train-mlogloss:0.003057 eval-mlogloss:0.205081
[177] train-mlogloss:0.003031 eval-mlogloss:0.205129
[178] train-mlogloss:0.003005 eval-mlogloss:0.205052
[179] train-mlogloss:0.002984 eval-mlogloss:0.205136
[180] train-mlogloss:0.002961 eval-mlogloss:0.205221
[181] train-mlogloss:0.002938 eval-mlogloss:0.205245
[182] train-mlogloss:0.002912 eval-mlogloss:0.205307
[183] train-mlogloss:0.002887 eval-mlogloss:0.205428
[184] train-mlogloss:0.002868 eval-mlogloss:0.205573
[185] train-mlogloss:0.002845 eval-mlogloss:0.205670
[186] train-mlogloss:0.002821 eval-mlogloss:0.205713
[187] train-mlogloss:0.002799 eval-mlogloss:0.205551
[188] train-mlogloss:0.002778 eval-mlogloss:0.205443
[189] train-mlogloss:0.002753 eval-mlogloss:0.205433
[190] train-mlogloss:0.002733 eval-mlogloss:0.205543
[191] train-mlogloss:0.002711 eval-mlogloss:0.205773
[192] train-mlogloss:0.002693 eval-mlogloss:0.205855
[193] train-mlogloss:0.002672 eval-mlogloss:0.206061
[194] train-mlogloss:0.002650 eval-mlogloss:0.206295
[195] train-mlogloss:0.002633 eval-mlogloss:0.206343
[196] train-mlogloss:0.002613 eval-mlogloss:0.206104
[197] train-mlogloss:0.002595 eval-mlogloss:0.206119
[198] train-mlogloss:0.002578 eval-mlogloss:0.206140
[199] train-mlogloss:0.002561 eval-mlogloss:0.206157
[200] train-mlogloss:0.002544 eval-mlogloss:0.206318
# 特征提取
importance <- xgb.importance(colnames(lym_PA_train), model = bst_model)
head(importance)
xgb.ggplot.importance(head(importance,20),n_clusters = 1) #这个cluster和分类没有关系
multi_featureplot(head(importance,9)$Feature,lym_ds2_PA)
lym_PA_genes <- head(importance, 500) ##选择top500
#混淆矩阵
predict_lym_PA_test <- round(predict(bst_model, newdata = lym_PA_test))
lym_PA_confuse_matrix_test <- table(lym_PA_test_data$label, predict_lym_PA_test, dnn=c("true","pre"))
lym_PA_confuse_matrix_test_prop <- prop.table(lym_PA_confuse_matrix_test, 1)
lym_PA_confuse_matrix_test
pre
true 0 1 2 3 4 5
0 1504 53 0 29 0 1
1 69 1076 16 0 8 1
2 0 24 538 0 8 0
3 57 1 0 329 0 0
4 0 17 6 0 287 0
5 4 4 4 0 0 88
lym_AC_data <- get_data_table(lym_ds2_AC, highvar = F, type = "data")
lym_AC_label <- as.numeric(as.character(Idents(lym_ds2_AC)))
colnames(lym_AC_data) <- NULL
lym_AC_test_data <- list(data = t(as(lym_AC_data,"dgCMatrix")), label = lym_AC_label)
lym_AC_test <- xgb.DMatrix(data = lym_AC_test_data$data,label = lym_AC_test_data$label)
multi_featureplot(head(importance,9)$Feature,lym_ds2_AC)
#混淆矩阵
predict_lym_AC_test <- round(predict(bst_model, newdata = lym_AC_test))
lym_AC_confuse_matrix_test <- table(lym_AC_test_data$label, predict_lym_AC_test, dnn=c("true","pre"))
lym_AC_confuse_matrix_test_prop <- prop.table(lym_AC_confuse_matrix_test, 1)
lym_AC_confuse_matrix_test
pre
true 0 1 2 3 4 5
0 1010 6 0 255 0 0
1 0 21 625 0 50 0
2 68 433 17 2 0 2
3 0 20 0 0 458 0
4 0 0 0 0 0 23
adjustedRandIndex(predict_lym_AC_test, lym_AC_test_data$label)
[1] 0.7227654
labels <- lapply(levels(Idents(lym_ds2_PA)), paste0, "_PA") %>% as.character()
labels2 <- lapply(levels(Idents(lym_ds2_AC)), paste0, "_AC") %>% as.character()
sources <- rep(0:5, each = 5) #注意这里的each和times的区别
colors <- rep(colors_list[1:6], each = 5)
targets <- rep(6:10, times = 6)
plot_ly(type = "sankey", orientation = "h",
node = list(
label = c(labels,labels2),
color = colors_list, pad = 15, thickness = 30,
line = list(
color = "black",
width = 1)),
link = list(
source = sources,
target = targets,
value = as.numeric(lym_AC_confuse_matrix_test),
color = colors
))
umapplot(lym_ds2_AC)
umapplot(lym_ds2_PA)
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.